clear all
capture log close
*log using "Cleaning_PreFinalSet.log", replace
*Main data pathway globals, set directory to clean data folder
global rawdata "S:\Project\DemoSos2\data2020"
global cleandata "S:\Project\DemoSos2\common\felles\JR_RG\DrVA\CleanData\"
cd "$cleandata"
*Make new folder and subfolders 
capture mkdir "$cleandata"


**------------------------------------------------------------------------------*
* relevant sample, gender, birth date, parents id
**------------------------------------------------------------------------------*
use lopenr mor_lnr far_lnr kjoenn foedselsaar fodtmnd invkat  using "S:\Project\DemoSos2\data2020\Befolkn\Demogr\faste_oppl", clear
rename (mor_lnr far_lnr foedselsaar kjoenn fodtmnd) (lopenr_mor lopenr_far byr sex bmo)
drop if lopenr_mor==""
drop if lopenr == "" 

gen NORborn = (invkat=="A")
drop invkat
save "sample.dta", replace	


**------------------------------------------------------------------------------*
*Municpality (From PREP_sample)
**------------------------------------------------------------------------------*
use "$rawdata\Befolkn\Demogr\tidspunktbestemte_var.dta", clear
sort lopenr aar
*Save temp
save "temp_municipalities.dta", replace	

use "sample.dta", clear
merge 1:m lopenr using "temp_municipalities.dta"
drop if _merge == 2 
drop _merge
*municipality of recidence is measured 1.Jan in a year => will not be registered in the birth year.
drop if aar<=byr
save "temp_background.dta", replace

*muni for each year
ren bostedskommune bokomm
gen tmp = bokomm if aar == byr
bys lopenr: egen fodekom = max(tmp)

* Municipality in each year
levelsof aar, local(levels) 
foreach yr of local levels {
	gen tmp`yr' = bokomm if aar == `yr'
	bys lopenr: egen bokom`yr' = max(tmp`yr')
}
drop tmp*

* Keeping one line per individual 
drop aar bokomm
bys lopenr: gen n = _n
keep if n == 1
codebook lopenr // 622,538
drop n
save "sample.dta", replace


**------------------------------------------------------------------------------*
*Siblings and birth order (From PREP_sample)
**------------------------------------------------------------------------------*
use "$rawdata\Befolkn\Demogr\faste_oppl.dta", clear
rename (mor_lnr far_lnr foedselsaar) (lopenr_mor lopenr_far byr)
drop if lopenr_mor==""
drop if lopenr == "" 

* Birth order
bys lopenr_mor (byr): gen birthorder = _n
la var birthorder "Birthorder"

* Mom's total no of children
bys lopenr_mor: gen siblings = _N
replace siblings = siblings - 1 if siblings!=0
la var siblings "Number of siblings"

**Number of older siblings 
gen negbirthorder= - birthorder
bys lopenr_mor (byr): egen tmp=max(negbirthorder)
gen oldersiblings=tmp-negbirthorder
drop negbirthorder tmp
la var oldersiblings "Older siblings"

** Year of mothers first birth
gen byr_birth1_ =(byr) if birthorder==1
bys lopenr_mor: egen byr_birth1=max(byr_birth1_)
drop byr_birth1_

save "temp_siblings.dta", replace	

**------------------------------------------------------------------------------*
*Parent background char 
**------------------------------------------------------------------------------*
*Need a appended Uutdanning
use "$rawdata\Utd\utdanning_90_19.dta", clear
append using "$rawdata\Utd\utdanning_70_89.dta" 
save "Uutdanning.dta", replace 	

*Need a appended sivilstand
use "$rawdata\Befolkn\Demogr\sivilstand_1975_1989.dta", clear
append using "$rawdata\Befolkn\Demogr\sivilstand_1990_2020.dta" 
destring sivilstand, replace
save "sivilstand.dta", replace 	
/*** Marital Status ***/
	/*
	1 Ugift
	2 Gift
	3 Enke/enkemann
	4 Skilt
	5 Separert
	6 Reg partner (siden 1993?)
	7 Separert partner
	8 Skilt partner
	9 Gjenlevende partner
	*/

*** Mothers 
	* birth year, immigrant, 
	use "sample.dta", clear
	keep lopenr_mor
	ren lopenr_mor lopenr
	bys lopenr: keep if _n==1
	merge 1:1 lopenr using "$rawdata\Befolkn\Demogr\faste_oppl.dta", keepusing(mor_lnr far_lnr foedselsaar invkat)
	rename (mor_lnr far_lnr foedselsaar) (lopenr_mor lopenr_far byr_mom)
	drop if _merge==1
	drop if _merge==2
	drop _merge

	keep lopenr invkat byr_mom
	gen NORborn_mom = (invkat=="A")
	drop invkat
	save "temp_mom.dta", replace

	* marital status 
	use "temp_mom.dta", clear
	keep lopenr
	merge 1:m lopenr using "sivilstand.dta"
	drop if _merge == 2
	drop _merge

	merge m:1 lopenr using "temp_mom.dta"
	drop _merge 
	ren lopenr lopenr_mor
	save "temp_mom.dta", replace



*** Fathers
	* birth year, immigrant
	use "sample.dta", clear
	keep lopenr_far
	ren lopenr_far lopenr
	bys lopenr: keep if _n==1
	drop if lopenr==""
	merge 1:1 lopenr using "$rawdata\Befolkn\Demogr\faste_oppl.dta", keepusing(mor_lnr far_lnr foedselsaar invkat)
	rename (mor_lnr far_lnr foedselsaar)  (lopenr_mor lopenr_far byr_dad)
	drop if _merge==2
	drop _merge

	keep lopenr invkat byr_dad
	gen NORborn_dad = (invkat=="A")
	drop invkat
	ren lopenr lopenr_far 
	save "temp_dad.dta", replace


/*** Parents education ***/
*Mother
	use "sample.dta", clear
	keep lopenr_mor
	ren lopenr_mor lopenr
	bys lopenr: gen n = _n
	keep if n==1
	drop n

	save "temp_mor_år_panel.dta", replace

	merge 1:m lopenr using "Uutdanning.dta"
	drop if _merge == 2 
	drop _merge 
	sort lopenr aar

	destring igang, replace
	gen educ_level_mom = substr(BU, 1,1)
	la var educ_level_mom "completed educ, NUS2000:1.s Nivå"
	destring BU, replace
	ren BU bu_mom
		
	drop igang bu_nivaa bu_gruppe
	ren lopenr lopenr_mor
	save "temp_BU_mom.dta", replace
	erase "temp_mor_år_panel.dta"


 * Father 
	use "sample.dta", clear
	keep lopenr_far
	ren lopenr_far lopenr
	drop if lopenr==""
	bys lopenr: gen n = _n
	keep if n==1
	drop n

	save "temp_far_år_panel.dta", replace

	merge 1:m lopenr using "Uutdanning.dta"
	drop if _merge == 2 
	drop _merge 
	sort lopenr aar

	destring igang, replace
	gen educ_level_dad = substr(BU, 1,1)
	la var educ_level_dad "completed educ, NUS2000:1.s Nivå"
	destring BU, replace
	ren BU bu_dad
		
	drop igang bu_nivaa bu_gruppe
	ren lopenr lopenr_far

	save "temp_BU_dad.dta", replace
	erase "temp_far_år_panel.dta"	



*Merge of sample prep (From PREP_sample)
use "sample.dta", clear
drop if lopenr==""
* siblings
merge 1:1 lopenr using "temp_siblings.dta"
keep if _merge == 3
drop _merge
* parents' background char.
*dad
merge m:1 lopenr_far using "temp_dad.dta"
 drop if _merge == 2
 drop _merge
 save "temp1.dta", replace
*mom
use "temp_mom.dta"
rename sivilstand civ_mom
drop if aar==.
quietly reshape wide civ_mom, i(lopenr_mor) j(aar)
sort lopenr_mor  
merge 1:m lopenr_mor using "temp1.dta"
 drop _merge
 save "temp2.dta", replace
 * Parents' education
* mom
use "temp_BU_mom.dta"
drop if aar==.
quietly reshape wide bu educ_level_mom, i(lopenr_mor) j(aar)
sort lopenr_mor  
merge 1:m lopenr_mor using "temp2.dta"
 drop _merge
 save "temp3.dta", replace
* dad
use "temp_BU_dad.dta"
drop if aar==.
quietly reshape wide bu educ_level_dad, i(lopenr_far) j(aar)
sort lopenr_far  
merge 1:m lopenr_far using "temp3.dta"
 drop _merge
 save "temp4.dta", replace

 


la var byr "Birth year child"
la var sex "Gender (1=male)"
la var NORborn "Born in Norway"
la var byr_mom "Mother's birth year"
la var NORborn_mom "Mother born in Norway"
la var byr_dad "Father's birth year"
la var NORborn_dad "Father born in Norway"
la var byr_birth1 "Birth year mother's first child"

bys lopenr: gen n = _n
keep if n==1
drop n

save sample_background2, replace


**------------------------------------------------------------------------------*
* Income/Welfare/Work
**------------------------------------------------------------------------------*
foreach yr of numlist 1993/2020 {
	use lopenr aar wyrkinnt saminnt wnarinnt sos_stonad aap arbled uforetrygd folketrygd woverfor wskpl_overf wskfrie_overf sykepenger using "S:\Project\DemoSos2\data2020\SSB2022\Inntekt\Inntekt`yr'.dta", clear
	rename aar year 
	save "income`yr'", replace
}

use income1993, clear
for num 1994/2020: append using "incomeX.dta"
compress
for num 1993/2020: erase  "incomeX.dta"

merge  m:1 year using cpi
keep if _m==3
drop _m
ge aux = cpi/100
rename saminnt tot_income
rename wyrkinnt lab_income

ge dself_inc = wnarinnt > 0 if  wnarinnt !=. 
lab var dself_inc "Any Self Emp Income"

gen     G=35033 if year == 1991
replace G=36167 if year == 1992
replace G=37033 if year == 1993
replace G=37820 if year == 1994
replace G=38847 if year == 1995
replace G=40410 if year == 1996
replace G=42000 if year == 1997
replace G=44413 if year == 1998
replace G=46423 if year == 1999
replace G=48377 if year == 2000
replace G=50603 if year == 2001
replace G=53233 if year == 2002
replace G=55964 if year == 2003
replace G=58139 if year == 2004
replace G=60059 if year == 2005
replace G=62161 if year == 2006
replace G=65505 if year == 2007
replace G=69108 if year == 2008
replace G=72006 if year == 2009
replace G=74721 if year == 2010
replace G=78024 if year == 2011
replace G=81153 if year == 2012
replace G=84204 if year == 2013
replace G=87328 if year == 2014
replace G=89502 if year == 2015
replace G=91740 if year == 2016
replace G=93281 if year == 2017
replace G=95800 if year == 2018
replace G=98866 if year == 2019
replace G=100853 if year == 2020

gen lab_incG = lab_income/G

for var tot_income lab_income sos_stonad aap arbled uforetrygd folketrygd woverfor wskpl_overf wskfrie_overf sykepenger: replace X =X/aux
drop aux cpi
 
** Social help
gen insoc = (sos_stonad!=. & sos_stonad!=0)
la var insoc "Social help"

** unemployment benefits
gen unempben = (arbled!=. & arbled!=0)
la var unempben "Unemployment benefits"

** disability 
ge di = (uforetrygd!=. & uforetrygd!=0)
lab var di "Disability income" 

** arbeidsavklaringspenger (aap)
ge daap = (aap!=. & aap!=0)
lab var daap "arbeidsavklaringspenger" 

ge sl_money = sykepenger !=. & sykepenger!=0

gen innav = unempben + insoc + di + daap 
lab var innav "in welfare"

qui compress
bys lopenr year: ge n = _N
drop if n ==2
drop n
save income_datanew, replace

**------------------------------------------------------------------------------*
* Income/Welfare/Work - R1 2nd round
**------------------------------------------------------------------------------*
foreach yr of numlist 1993/2020 {
	use lopenr alderspensj_folketr uforetrygd tjenestepensjon_afp barnetrygd bostotte studiestipend forsorgerfradrag grunn_hjelp  kontantstotte sykepenger aar  using "S:\Project\DemoSos2\data2020\SSB2022\Inntekt\Inntekt`yr'.dta", clear
	rename aar year 
	save "income`yr'", replace
}

use income1993, clear
for num 1994/2020: append using "incomeX.dta"
compress
for num 1993/2020: erase  "incomeX.dta"

for var  alderspensj_folketr uforetrygd  tjenestepensjon_afp barnetrygd bostotte studiestipend forsorgerfradrag grunn_hjelp  kontantstotte sykepenger: ge dX= X > 0 if X!=.
drop  alderspensj_folketr uforetrygd  tjenestepensjon_afp barnetrygd bostotte studiestipend forsorgerfradrag grunn_hjelp  kontantstotte sykepenger

qui compress
bys lopenr year: ge n = _N
drop if n ==2
drop n
save welfare_extra, replace


**------------------------------------------------------------------------------*
* sick leave
**------------------------------------------------------------------------------*

use "S:\Project\DemoSos2\data2020\NAV\sykep_1989_2019.dta" , clear
ge days =  SPTOM-ARBUF
ge year_start = year(ARBUF)
ge diag_sl = substr(DIAGNOSE,1,1) 
keep lopenr days year_start diag_sl
replace diag_sl="L" if diag_sl=="l"
collapse (sum) day, by(lopenr year_start diag_sl)
foreach k in 0 A B C D E F G H I J K L M N O P Q R S T U V X Y Z {
	ge aux = days if diag_sl=="`k'"
	bys lopenr year_start: egen diag_`k' = max(aux)
	drop aux
}  
bys lopenr year_start: egen total_days_sl = sum(days)
drop diag_sl days
duplicates drop
keep if year_start >= 1989
rename year_start year

save sick_leave, replace

************************************************************************
* PREPARE GP DATA
************************************************************************
*GP characteristics (From PREP_GP)
use "$rawdata\KUHR\FastLegeInfo.dta", clear 
* There is more info in this file than we keep here. 
rename (BehandlerID legekjønn legealder listelengde listetak allmennspes aar) (gpid gender_gp age_gp list_gp maxlist_gp spec_gp year) 
drop if gpid==.

gen male_gp = 1 if gender_gp == "M"
replace male_gp=0 if gender_gp == "K"
drop gender_gp

replace  spec_gp = 0 if  spec_gp==.

save gp_charateristics, replace 


************************************************************************
* PREPARE GP-PATIENT DATA: IDENTIFY SWAPS
************************************************************************
use "$rawdata\KUHR\PasientLister.dta", clear
rename opphoeraarsak oppharsak
drop if BehandlerID == . | BehandlerID == 0 |  lopenr == "0" | lopenr == "" 
save "_temp2.dta", replace

	forvalues i = 2010/2020     {
		use "_temp2.dta", clear
		keep if tomdato > d(01jan`i')
		gen year = `i'
		gen swap = 1 if fomdato<= d(01jan`i') & tomdato <= d(31dec`i')
		replace swap = 0 if swap==.
		gen d_swap = tomdato if swap==1
		sort lopenr fomdato
		gen r_swap = opprettelseaarsak[_n+1] if swap==1
		drop if fomdato > d(01jan`i')
		rename BehandlerID gpid
		keep  lopenr year gpid swap r_swap d_swap   
		save _newtemp`i', replace
	
	}
	use _newtemp2001, clear
forvalues i = 2002/2020       {
		append using _newtemp`i'
	}

la var gpid "GP in beg. of year"
la var swap "Swap of GP in this year"
la var r_swap "Reason for swap"
la var d_swap "Date of swap"


*Reasons for starting with the new GP 
gen a_swap = 1 if swap==1 & r_swap=="Automatisk tildeling"
replace a_swap = 0 if swap==1 & a_swap==.
gen o_swap = 1 if swap==1 & r_swap=="Ordinært bytte"
replace o_swap = 0 if swap==1 & o_swap==.
gen w_swap = 1 if swap==1 & r_swap=="Tildelt fra venteliste"
replace w_swap = 0 if swap==1 & w_swap==.
gen red_swap = 1 if swap==1 & r_swap=="Lege har redusert praksis"
replace red_swap = 0 if swap==1 & red_swap==.
gen t_swap = 1 if swap==1 & r_swap=="Lege har avsluttet praksis"
replace t_swap = 0 if swap==1 & t_swap==.
gen red_term_swap = 1 if swap==1 & r_swap=="Lege har redusert eller avsluttet praksis" // note that red_term_swap _swap is not the sum of red_swap and t_swap, but an additional category
replace red_term_swap = 0 if swap==1 & red_term_swap==.
gen other_swap = 1 if swap==1 & a_swap==0 & o_swap==0 & w_swap==0 & red_swap==0 & t_swap==0 & red_term_swap==0   
replace other_swap = 0 if swap==1 & other_swap==.

la var a_swap "Automatically assigned"
la var o_swap "Ordinary patient inititated swap"
la var w_swap "Assigned from waiting list"
la var red_swap "GP reduced list size"
la var t_swap "GP terminated list"
la var red_term_swap "GP reduced or terminated list"
la var other_swap "Other reason for swap"

drop r_swap


bysort lopenr year: gen x = _n
count if x>1
*439 ---> 846 this is what i get sep 23 2021 rg
drop if x>1
drop x
save patient_gp2, replace

************************************************************************
* Link GPs' characteristics to patients*PREP_merge_dataset
************************************************************************
use "gp_charateristics", clear

*dropping GPs that change sex or are unidentified
bys gpid: egen aux=mean(male_gp)
tab aux 
drop if aux > 0 & aux < 1 ///0

drop aux
bys gpid year: ge n = _N
* 876
drop if n > 1
drop n
drop kommunenr kommunenavn

count
merge 1:m gpid year using "referral_gp.dta"
keep if _merge !=2
drop _merge
count
merge 1:m gpid year using "patient_gp2.dta"
keep if _merge == 3 
drop _merge
sort lopenr

* variables not used
drop legeharavsluttetpraksis automatisktildeling nyepas frafpas barngjenforentforeldre legeharredusertelleravsluttetpra innbyggertilbaketiltidligerefast korreksjon manuelltildeling ordinærtbytte legeharredusertpraksis tildeltfraventeliste opprettelseskodeikkeoppgitt innbyggererdør innbyggerharforsvunnet innbyggerharutvandret innbyggerharmeldtsegutavfastlege opphørskodeikkeoppgitt
compress

reshape wide  gpid age_gp male_gp fellesliste gruppepraksis /// *gps
		list_gp maxlist_gp spec_gp nb_ref nb_ref_short ref_time ///
		swap d_swap a_swap o_swap w_swap red_swap t_swap red_term_swap other_swap /// *individual
		, i(lopenr) j(year)
merge 1:1 lopenr using "sample_background2.dta"
keep if _merge == 3 
drop _merge
save FinalSet4, replace




